Check that we consistently recover the early burst pattern across replicates and downsampling schemes

First, check that the three replicates for each downsampling scheme converged, with appropriate effective sample sizes (>200).

1 Rep1 check convergence

Code
#show ESS for individual replicates
#rep1
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r1.r1.png")

Code
#rep2
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r1.r2.png")

Code
#rep3
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r1.r3.png")

Code
#all three chains combined all converged on the same parameters
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r1.all.png")

Code
#check Gelman-Rubin diagnostic. If PSRF scores are near 1, then we are confident the chain has converged
library(stableGR)
Loading required package: mcmcse
Code
#rep1.r1
x<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep1.log", header=T)
xx<-as.matrix(x[454:2001,2:7])
stable.GR(xx)
Warning in mcse.multi(x, method = method, size = size): Estimated matrix not
positive definite. The chain might be highly correlated or very high
dimensional. Consider increasing the sample size. Using the default batch means
estimator as a substitute.
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 0.9999996

$means
       posterior       likelihood            prior           lambda 
   -7.455267e+03    -7.439374e+03    -1.589381e+01     2.904598e-01 
treeHeightLogger        clockRate 
    4.186890e+00     1.424388e-03 

$n.eff
[1] 1550.067

$blather
[1] FALSE
Code
#rep1.r2
y<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep1.r2/rep1.log", header=T)
yy<-as.matrix(y[451:2001,2:7])
stable.GR(yy)
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 1.000001

$means
       posterior       likelihood            prior           lambda 
   -7.455253e+03    -7.439378e+03    -1.587551e+01     2.899475e-01 
treeHeightLogger        clockRate 
    4.177791e+00     1.415933e-03 

$n.eff
[1] 1548.015

$blather
[1] FALSE
Code
#rep1.r3
z<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep1.r3/rep1.log", header=T)
zz<-as.matrix(z[452:2001,2:7])
stable.GR(zz)
Warning in mcse.multi(x, method = method, size = size): Estimated matrix not
positive definite. The chain might be highly correlated or very high
dimensional. Consider increasing the sample size. Using the default batch means
estimator as a substitute.
$psrf
       posterior       likelihood            prior           lambda 
       1.0000128        1.0000210        0.9999979        0.9999983 
treeHeightLogger        clockRate 
       1.0000075        1.0000354 

$mpsrf
[1] NaN

$means
       posterior       likelihood            prior           lambda 
   -7.455262e+03    -7.439317e+03    -1.594483e+01     2.887641e-01 
treeHeightLogger        clockRate 
    4.230101e+00     1.395628e-03 

$n.eff
[1] NaN

$blather
[1] FALSE

2 Rep2 check convergence

Code
#show ESS for individual replicates
#rep1
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r2.r1.png")

Code
#rep2
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r2.r2.png")

Code
#rep3
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r2.r3.png")

Code
#all three chains combined all converged on the same parameters
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r2.all.png")

Code
#check Gelman-Rubin diagnostic. If PSRF scores are near 1, then we are confident the chain has converged
#rep1.r1
x<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep2.log", header=T)
xx<-as.matrix(x[451:2001,2:7])
stable.GR(xx)
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 1

$means
       posterior       likelihood            prior           lambda 
   -7.309323e+03    -7.293553e+03    -1.576971e+01     2.904698e-01 
treeHeightLogger        clockRate 
    4.210075e+00     1.311875e-03 

$n.eff
[1] 1550.661

$blather
[1] FALSE
Code
#rep1.r2
y<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep2.r2/rep2.log", header=T)
yy<-as.matrix(y[452:2001,2:7])
stable.GR(yy)
Warning in mcse.multi(x, method = method, size = size): Estimated matrix not
positive definite. The chain might be highly correlated or very high
dimensional. Consider increasing the sample size. Using the default batch means
estimator as a substitute.
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 0.9999992

$means
       posterior       likelihood            prior           lambda 
   -7.309565e+03    -7.293690e+03    -1.587471e+01     2.905453e-01 
treeHeightLogger        clockRate 
    4.257202e+00     1.311205e-03 

$n.eff
[1] 1553.918

$blather
[1] FALSE
Code
#rep1.r3
z<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep2.r3/rep2.log", header=T)
zz<-as.matrix(z[453:2001,2:7])
stable.GR(zz)
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 0.9999995

$means
       posterior       likelihood            prior           lambda 
   -7.309259e+03    -7.293441e+03    -1.581794e+01     2.954793e-01 
treeHeightLogger        clockRate 
    4.213386e+00     1.319406e-03 

$n.eff
[1] 1551.447

$blather
[1] FALSE

3 Rep3 check convergence

Code
#show ESS for individual replicates
#rep1
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r3.r1.png")

Code
#rep2
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r3.r2.png")

Code
#rep3
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r3.r3.png")

Code
#all three chains combined all converged on the same parameters
knitr::include_graphics("/Users/devonderaad/Desktop/ESS.r3.all.png")

Code
#check Gelman-Rubin diagnostic. If PSRF scores are near 1, then we are confident the chain has converged
#rep3.r1
x<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep3.log", header=T)
xx<-as.matrix(x[452:2001,2:7])
stable.GR(xx)
Warning in mcse.multi(x, method = method, size = size): Estimated matrix not
positive definite. The chain might be highly correlated or very high
dimensional. Consider increasing the sample size. Using the default batch means
estimator as a substitute.
$psrf
[1] 1.0000190 1.0000298 0.9999983 1.0000249 1.0000082 1.0000482

$mpsrf
[1] 1.000028

$means
       posterior       likelihood            prior           lambda 
   -7.247140e+03    -7.231401e+03    -1.573870e+01     2.878391e-01 
treeHeightLogger        clockRate 
    4.326736e+00     1.128160e-03 

$n.eff
[1] 1423.122

$blather
[1] FALSE
Code
#rep3.r2
y<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep3.r2/rep3.log", header=T)
yy<-as.matrix(y[452:2001,2:7])
stable.GR(yy)
Warning in mcse.multi(x, method = method, size = size): Estimated matrix not
positive definite. The chain might be highly correlated or very high
dimensional. Consider increasing the sample size. Using the default batch means
estimator as a substitute.
$psrf
       posterior       likelihood            prior           lambda 
               1                1                1                1 
treeHeightLogger        clockRate 
               1                1 

$mpsrf
[1] 0.9999998

$means
       posterior       likelihood            prior           lambda 
   -7.247219e+03    -7.231495e+03    -1.572384e+01     2.915658e-01 
treeHeightLogger        clockRate 
    4.351982e+00     1.125404e-03 

$n.eff
[1] 1551.188

$blather
[1] FALSE
Code
#rep3.r3
z<-read.table("~/Desktop/nmel.ceyx.rad/snapp/rep3.r3/rep3.log", header=T)
zz<-as.matrix(z[452:2001,2:7])
stable.GR(zz)
$psrf
       posterior       likelihood            prior           lambda 
        1.000010         1.000006         1.000011         1.000007 
treeHeightLogger        clockRate 
        1.000012         1.000039 

$mpsrf
[1] NaN

$means
       posterior       likelihood            prior           lambda 
   -7.247052e+03    -7.231307e+03    -1.574480e+01     2.912298e-01 
treeHeightLogger        clockRate 
    4.341180e+00     1.117864e-03 

$n.eff
[1] NaN

$blather
[1] FALSE

4 Check whether early burst is found in each replicate: Rep1(r1)

Code
#phytools approach
library(phytools)
Loading required package: ape
Loading required package: maps
Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep1.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.792403
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

5 Check whether early burst is found in each replicate: Rep1(r2)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep1.r2/rep1.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.788046
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

6 Check whether early burst is found in each replicate: Rep1(r3)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep1.r3/rep1.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.839191
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

7 Check whether early burst is found in each replicate: Rep2(r1)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep2.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.806352
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

8 Check whether early burst is found in each replicate: Rep2(r2)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep2.r2/rep2.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.839811
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

9 Check whether early burst is found in each replicate: Rep2(r3)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep2.r3/rep2.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.798614
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

10 Check whether early burst is found in each replicate: Rep3(r1)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep3.consensus.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.798202
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

11 Check whether early burst is found in each replicate: Rep3(r2)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep3.r2/rep3.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.82212
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)

12 Check whether early burst is found in each replicate: Rep3(r3)

Code
#read in the consensus tree
tree <- ape::read.nexus(file = "~/Desktop/nmel.ceyx.rad/snapp/rep3.r3/rep3.con.tree")
#remove root so that only the diversification rate within the northern Melanesian clade is assessed
tree<-drop.tip(tree, "margarethae")

#set h = the height of the empirical tree
h<-max(nodeHeights(tree))
h
[1] 3.816228
Code
#set x as 1000 evenly spaced numbers between zero and the height of the input tree
x<-seq(0,h,by=h/1000)
#set birth rate equal to a log-linear rate, following a pure birth prediction
b<-(log(Ntip(tree))-log(2))/h
#simulate 1000 trees with the same height and tip number as our empirical tree, plus a log-linear diversification rate
trees<-pbtree(b=b, n=Ntip(tree), t=h,nsim=1000 ,method="direct", quiet=TRUE)

#calculate 95% CI for the 1000 trees simmed above under a pure birth/death model
object<-ltt95(trees,log=TRUE,plot=FALSE, mode = "mean")
#set up the plot
par(bty="l")
#plot background 95% CI based on simulated data
plot(object, labels=FALSE, shaded=TRUE, xaxis = "negative", bg="grey90")
axis(side = 1, at=seq(-4,0, by=.5))
#cover the mean line with a grey line
lines(x=object[,1]-max(object[,1]),y=object[,3], type="l", lwd=2, col="grey60")
#plot empirical LTT
obj2<-ltt(tree, plot=FALSE)
#flip x axis values to negative
obj2$times<-obj2$times-max(obj2$times)
#plot on top of existing plot
plot(obj2, add=TRUE, log.lineages=FALSE, col="black", lwd=4, lty=1)